#install BiocManager if needed
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

#install GSVA if necessary
if (!requireNamespace( "GSVA" , quietly = TRUE)) 
    BiocManager::install("GSVA")

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.0     v forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(GSVA)
library(broom)
## Warning: package 'broom' was built under R version 4.1.2

Outline: 1. Load in the Catenin signature information created from differential gene expression analysis 2. Load in CTRP RNA-Seq data 3. Format and perform GSVA to score each CTRP cell line 4. Load in the PARP inhibitor and Drug inhibitor information 5. Perform correlation analysis between GSVA signature scores and Drug Inhibitor

1. Load in the Catenin signature information created from differential gene expression analysis

files <- dir("./Signatures/", full.names = T, pattern = "\\.gmx")
sig.names <- gsub(pattern = "./Signatures/|\\.gmx", replacement = "", x = files)

signature_list <- list()

for(i in 1:length(files)){
  signature_list[[i]] <- scan(file = files[[i]], what = "character", skip = 2)
    print(signature_list[[i]])
  assign(sig.names[[i]], value = signature_list[[i]])
}
##   [1] "ABL1"     "AMD1"     "ARID4A"   "ATF5"     "ATRX"     "AURKA"   
##   [7] "AURKB"    "BARD1"    "BCL3"     "BIRC5"    "BRCA2"    "BUB1"    
##  [13] "BUB3"     "CASP8AP2" "CBX1"     "CCNA2"    "CCNB2"    "CCND1"   
##  [19] "CCNF"     "CCNT1"    "CDC20"    "CDC25A"   "CDC25B"   "CDC27"   
##  [25] "CDC45"    "CDC6"     "CDC7"     "CDK1"     "CDK4"     "CDKN1B"  
##  [31] "CDKN2C"   "CDKN3"    "CENPA"    "CENPE"    "CENPF"    "CHAF1A"  
##  [37] "CHEK1"    "CHMP1A"   "CKS1B"    "CKS2"     "CTCF"     "CUL1"    
##  [43] "CUL3"     "CUL4A"    "CUL5"     "DBF4"     "DDX39A"   "DKC1"    
##  [49] "DMD"      "DR1"      "DTYMK"    "E2F1"     "E2F2"     "E2F3"    
##  [55] "E2F4"     "EFNA5"    "EGF"      "ESPL1"    "EWSR1"    "EXO1"    
##  [61] "EZH2"     "FANCC"    "FBXO5"    "FOXN3"    "G3BP1"    "GINS2"   
##  [67] "GSPT1"    "H2AX"     "H2AZ1"    "H2AZ2"    "H2BC12"   "HIF1A"   
##  [73] "HIRA"     "HMGA1"    "HMGB3"    "HMGN2"    "HMMR"     "HNRNPD"  
##  [79] "HNRNPU"   "HOXC10"   "HSPA8"    "HUS1"     "ILF3"     "INCENP"  
##  [85] "JPT1"     "KATNA1"   "KIF11"    "KIF15"    "KIF20B"   "KIF22"   
##  [91] "KIF23"    "KIF2C"    "KIF4A"    "KIF5B"    "KMT5A"    "KNL1"    
##  [97] "KPNA2"    "KPNB1"    "LBR"      "LIG3"     "LMNB1"    "MAD2L1"  
## [103] "MAP3K20"  "MAPK14"   "MARCKS"   "MCM2"     "MCM3"     "MCM5"    
## [109] "MCM6"     "MEIS1"    "MEIS2"    "MKI67"    "MNAT1"    "MT2A"    
## [115] "MTF2"     "MYBL2"    "MYC"      "NASP"     "NCL"      "NDC80"   
## [121] "NEK2"     "NOLC1"    "NOTCH2"   "NSD2"     "NUMA1"    "NUP50"   
## [127] "NUP98"    "NUSAP1"   "ODC1"     "ODF2"     "ORC5"     "ORC6"    
## [133] "PAFAH1B1" "PBK"      "PDS5B"    "PLK1"     "PLK4"     "PML"     
## [139] "POLA2"    "POLE"     "POLQ"     "PRC1"     "PRIM2"    "PRMT5"   
## [145] "PRPF4B"   "PTTG1"    "PTTG3P"   "PURA"     "RACGAP1"  "RAD21"   
## [151] "RAD23B"   "RAD54L"   "RASAL2"   "RBL1"     "RBM14"    "RPA2"    
## [157] "RPS6KA5"  "SAP30"    "SFPQ"     "SLC12A2"  "SLC38A1"  "SLC7A1"  
## [163] "SLC7A5"   "SMAD3"    "SMARCC1"  "SMC1A"    "SMC2"     "SMC4"    
## [169] "SNRPD1"   "SQLE"     "SRSF1"    "SRSF10"   "SRSF2"    "SS18"    
## [175] "STAG1"    "STIL"     "STMN1"    "SUV39H1"  "SYNCRIP"  "TACC3"   
## [181] "TENT4A"   "TFDP1"    "TGFB1"    "TLE3"     "TMPO"     "TNPO2"   
## [187] "TOP1"     "TOP2A"    "TPX2"     "TRA2B"    "TRAIP"    "TROAP"   
## [193] "TTK"      "UBE2C"    "UBE2S"    "UCK2"     "UPF1"     "WRN"     
## [199] "XPO1"     "YTHDC1"  
##  [1] "BLM"    "BRCA2"  "EME1"   "MRE11A" "MUS81"  "NBN"    "POLD1"  "POLD2" 
##  [9] "POLD3"  "POLD4"  "RAD50"  "RAD51"  "RAD51B" "RAD51C" "RAD51D" "RAD52" 
## [17] "RAD54B" "RAD54L" "RPA1"   "RPA2"   "RPA3"   "RPA4"   "SHFM1"  "SSBP1" 
## [25] "TOP3A"  "TOP3B"  "XRCC2"  "XRCC3" 
##   [1] "ABRAXAS1" "AHCTF1"   "ANAPC1"   "ANAPC10"  "ANAPC11"  "ANAPC15" 
##   [7] "ANAPC16"  "ANAPC2"   "ANAPC4"   "ANAPC5"   "ANAPC7"   "ATM"     
##  [13] "ATR"      "ATRIP"    "AURKB"    "B9D2"     "BABAM1"   "BABAM2"  
##  [19] "BARD1"    "BIRC5"    "BLM"      "BRCA1"    "BRCC3"    "BRIP1"   
##  [25] "BUB1"     "BUB1B"    "BUB3"     "CCNA1"    "CCNA2"    "CCNB1"   
##  [31] "CCNB2"    "CCNE1"    "CCNE2"    "CDC16"    "CDC20"    "CDC23"   
##  [37] "CDC25A"   "CDC25C"   "CDC26"    "CDC27"    "CDC45"    "CDC6"    
##  [43] "CDC7"     "CDCA8"    "CDK1"     "CDK2"     "CDKN1A"   "CDKN1B"  
##  [49] "CDKN2A"   "CENPA"    "CENPC"    "CENPE"    "CENPF"    "CENPH"   
##  [55] "CENPI"    "CENPK"    "CENPL"    "CENPM"    "CENPN"    "CENPO"   
##  [61] "CENPP"    "CENPQ"    "CENPS"    "CENPT"    "CENPU"    "CHEK1"   
##  [67] "CHEK2"    "CKAP5"    "CLASP1"   "CLASP2"   "CLIP1"    "CLSPN"   
##  [73] "COP1"     "DBF4"     "DNA2"     "DSN1"     "DYNC1H1"  "DYNC1I1" 
##  [79] "DYNC1I2"  "DYNC1LI1" "DYNC1LI2" "DYNLL1"   "DYNLL2"   "ERCC6L"  
##  [85] "EXO1"     "GTSE1"    "H2AX"     "H2BC1"    "H2BC10"   "H2BC11"  
##  [91] "H2BC12"   "H2BC13"   "H2BC14"   "H2BC15"   "H2BC17"   "H2BC21"  
##  [97] "H2BC3"    "H2BC4"    "H2BC5"    "H2BC6"    "H2BC7"    "H2BC8"   
## [103] "H2BC9"    "H2BS1"    "H2BU1"    "H3-4"     "H4-16"    "H4C1"    
## [109] "H4C11"    "H4C12"    "H4C13"    "H4C14"    "H4C15"    "H4C2"    
## [115] "H4C3"     "H4C4"     "H4C5"     "H4C6"     "H4C8"     "H4C9"    
## [121] "HERC2"    "HUS1"     "INCENP"   "ITGB3BP"  "KAT5"     "KIF18A"  
## [127] "KIF2A"    "KIF2B"    "KIF2C"    "KNL1"     "KNTC1"    "MAD1L1"  
## [133] "MAD2L1"   "MAPRE1"   "MCM10"    "MCM2"     "MCM3"     "MCM4"    
## [139] "MCM5"     "MCM6"     "MCM7"     "MCM8"     "MDC1"     "MDM2"    
## [145] "MDM4"     "MIS12"    "MRE11"    "NBN"      "NDC80"    "NDE1"    
## [151] "NDEL1"    "NSD2"     "NSL1"     "NUDC"     "NUF2"     "NUP107"  
## [157] "NUP133"   "NUP160"   "NUP37"    "NUP43"    "NUP85"    "NUP98"   
## [163] "ORC1"     "ORC2"     "ORC3"     "ORC4"     "ORC5"     "ORC6"    
## [169] "PAFAH1B1" "PCBP4"    "PHF20"    "PIAS4"    "PKMYT1"   "PLK1"    
## [175] "PMF1"     "PPP1CC"   "PPP2CA"   "PPP2CB"   "PPP2R1A"  "PPP2R1B" 
## [181] "PPP2R5A"  "PPP2R5B"  "PPP2R5C"  "PPP2R5D"  "PPP2R5E"  "PSMA1"   
## [187] "PSMA2"    "PSMA3"    "PSMA4"    "PSMA5"    "PSMA6"    "PSMA7"   
## [193] "PSMA8"    "PSMB1"    "PSMB10"   "PSMB11"   "PSMB2"    "PSMB3"   
## [199] "PSMB4"    "PSMB5"    "PSMB6"    "PSMB7"    "PSMB8"    "PSMB9"   
## [205] "PSMC1"    "PSMC2"    "PSMC3"    "PSMC4"    "PSMC5"    "PSMC6"   
## [211] "PSMD1"    "PSMD10"   "PSMD11"   "PSMD12"   "PSMD13"   "PSMD14"  
## [217] "PSMD2"    "PSMD3"    "PSMD4"    "PSMD5"    "PSMD6"    "PSMD7"   
## [223] "PSMD8"    "PSMD9"    "PSME1"    "PSME2"    "PSME3"    "PSME4"   
## [229] "PSMF1"    "RAD1"     "RAD17"    "RAD50"    "RAD9A"    "RAD9B"   
## [235] "RANBP2"   "RANGAP1"  "RBBP8"    "RCC2"     "RFC2"     "RFC3"    
## [241] "RFC4"     "RFC5"     "RHNO1"    "RMI1"     "RMI2"     "RNF168"  
## [247] "RNF8"     "RPA1"     "RPA2"     "RPA3"     "RPS27"    "RPS27A"  
## [253] "SEC13"    "SEH1L"    "SEM1"     "SFN"      "SGO1"     "SGO2"    
## [259] "SKA1"     "SKA2"     "SPC24"    "SPC25"    "SPDL1"    "SUMO1"   
## [265] "TAOK1"    "TOP3A"    "TOPBP1"   "TP53"     "TP53BP1"  "UBA52"   
## [271] "UBB"      "UBC"      "UBE2C"    "UBE2D1"   "UBE2E1"   "UBE2N"   
## [277] "UBE2S"    "UBE2V2"   "UIMC1"    "WEE1"     "WRN"      "XPO1"    
## [283] "YWHAB"    "YWHAE"    "YWHAG"    "YWHAH"    "YWHAQ"    "YWHAZ"   
## [289] "ZNF385A"  "ZW10"     "ZWILCH"   "ZWINT"   
##   [1] "ACD"      "ANKRD28"  "ATRX"     "BLM"      "CCNA1"    "CCNA2"   
##   [7] "CDK2"     "CENPA"    "CENPC"    "CENPH"    "CENPI"    "CENPK"   
##  [13] "CENPL"    "CENPM"    "CENPN"    "CENPO"    "CENPP"    "CENPQ"   
##  [19] "CENPS"    "CENPT"    "CENPU"    "CENPW"    "CENPX"    "CHTF18"  
##  [25] "CHTF8"    "CTC1"     "DAXX"     "DKC1"     "DNA2"     "DSCC1"   
##  [31] "FEN1"     "GAR1"     "H2AB1"    "H2AC14"   "H2AC18"   "H2AC19"  
##  [37] "H2AC20"   "H2AC4"    "H2AC6"    "H2AC7"    "H2AC8"    "H2AJ"    
##  [43] "H2AX"     "H2AZ1"    "H2AZ2"    "H2BC1"    "H2BC10"   "H2BC11"  
##  [49] "H2BC12"   "H2BC13"   "H2BC14"   "H2BC15"   "H2BC17"   "H2BC21"  
##  [55] "H2BC3"    "H2BC4"    "H2BC5"    "H2BC6"    "H2BC7"    "H2BC8"   
##  [61] "H2BC9"    "H2BS1"    "H2BU1"    "H3-3A"    "H3-3B"    "H3-4"    
##  [67] "H4-16"    "H4C1"     "H4C11"    "H4C12"    "H4C13"    "H4C14"   
##  [73] "H4C15"    "H4C2"     "H4C3"     "H4C4"     "H4C5"     "H4C6"    
##  [79] "H4C8"     "H4C9"     "HJURP"    "ITGB3BP"  "KNL1"     "LIG1"    
##  [85] "MIS18A"   "MIS18BP1" "NHP2"     "NOP10"    "NPM1"     "OIP5"    
##  [91] "PCNA"     "PIF1"     "POLA1"    "POLA2"    "POLD1"    "POLD2"   
##  [97] "POLD3"    "POLD4"    "POLR2A"   "POLR2B"   "POLR2C"   "POLR2D"  
## [103] "POLR2E"   "POLR2F"   "POLR2G"   "POLR2H"   "POLR2I"   "POLR2J"  
## [109] "POLR2K"   "POLR2L"   "POT1"     "PPP6C"    "PPP6R3"   "PRIM1"   
## [115] "PRIM2"    "RBBP4"    "RBBP7"    "RFC1"     "RFC2"     "RFC3"    
## [121] "RFC4"     "RFC5"     "RPA1"     "RPA2"     "RPA3"     "RSF1"    
## [127] "RTEL1"    "RUVBL1"   "RUVBL2"   "SHQ1"     "SMARCA5"  "STN1"    
## [133] "TEN1"     "TERF1"    "TERF2"    "TERF2IP"  "TERT"     "TINF2"   
## [139] "WRAP53"   "WRN"     
##  [1] "ATM"      "ATR"      "ATRIP"    "BARD1"    "BLM"      "BRCA1"   
##  [7] "BRCA2"    "BRIP1"    "CHEK1"    "DNA2"     "EME1"     "EME2"    
## [13] "EXO1"     "GEN1"     "HUS1"     "KAT5"     "MRE11"    "MUS81"   
## [19] "NBN"      "PALB2"    "PCNA"     "POLD1"    "POLD2"    "POLD3"   
## [25] "POLD4"    "POLE"     "POLE2"    "POLE3"    "POLE4"    "POLH"    
## [31] "POLK"     "RAD1"     "RAD17"    "RAD50"    "RAD51"    "RAD51AP1"
## [37] "RAD51B"   "RAD51C"   "RAD51D"   "RAD9A"    "RAD9B"    "RBBP8"   
## [43] "RFC1"     "RFC2"     "RFC3"     "RFC4"     "RFC5"     "RHNO1"   
## [49] "RMI1"     "RMI2"     "RPA1"     "RPA2"     "RPA3"     "RPS27A"  
## [55] "RTEL1"    "SLX1A"    "SLX1B"    "SLX4"     "SPIDR"    "TOP3A"   
## [61] "TOPBP1"   "UBA52"    "UBB"      "UBC"      "WRN"      "XRCC2"   
## [67] "XRCC3"   
##   [1] "ABL1"     "ABRAXAS1" "ATM"      "ATR"      "ATRIP"    "BABAM1"  
##   [7] "BABAM2"   "BARD1"    "BLM"      "BRCA1"    "BRCA2"    "BRCC3"   
##  [13] "BRIP1"    "CCNA1"    "CCNA2"    "CDK2"     "CHEK1"    "CLSPN"   
##  [19] "DNA2"     "EME1"     "EME2"     "ERCC1"    "ERCC4"    "EXO1"    
##  [25] "FEN1"     "GEN1"     "H2AX"     "H2BC1"    "H2BC10"   "H2BC11"  
##  [31] "H2BC12"   "H2BC13"   "H2BC14"   "H2BC15"   "H2BC17"   "H2BC21"  
##  [37] "H2BC3"    "H2BC4"    "H2BC5"    "H2BC6"    "H2BC7"    "H2BC8"   
##  [43] "H2BC9"    "H2BS1"    "H2BU1"    "H3-4"     "H4-16"    "H4C1"    
##  [49] "H4C11"    "H4C12"    "H4C13"    "H4C14"    "H4C15"    "H4C2"    
##  [55] "H4C3"     "H4C4"     "H4C5"     "H4C6"     "H4C8"     "H4C9"    
##  [61] "HERC2"    "HUS1"     "KAT5"     "LIG3"     "MDC1"     "MRE11"   
##  [67] "MUS81"    "NBN"      "NSD2"     "PALB2"    "PARP1"    "PARP2"   
##  [73] "PCNA"     "PIAS4"    "POLD1"    "POLD2"    "POLD3"    "POLD4"   
##  [79] "POLE"     "POLE2"    "POLE3"    "POLE4"    "POLH"     "POLK"    
##  [85] "POLQ"     "PPP4C"    "PPP4R2"   "RAD1"     "RAD17"    "RAD50"   
##  [91] "RAD51"    "RAD51AP1" "RAD51B"   "RAD51C"   "RAD51D"   "RAD52"   
##  [97] "RAD9A"    "RAD9B"    "RBBP8"    "RFC1"     "RFC2"     "RFC3"    
## [103] "RFC4"     "RFC5"     "RHNO1"    "RMI1"     "RMI2"     "RNF168"  
## [109] "RNF4"     "RNF8"     "RPA1"     "RPA2"     "RPA3"     "RPS27A"  
## [115] "RTEL1"    "SIRT6"    "SLX1A"    "SLX1B"    "SLX4"     "SPIDR"   
## [121] "SUMO1"    "SUMO2"    "TIMELESS" "TIPIN"    "TOP3A"    "TOPBP1"  
## [127] "TP53BP1"  "UBA52"    "UBB"      "UBC"      "UBE2I"    "UBE2N"   
## [133] "UBE2V2"   "UIMC1"    "WRN"      "XRCC1"    "XRCC2"    "XRCC3"
names(signature_list) <- sig.names
CAT.signatures <- read_tsv(file = "Signatures/Cluster_6_HGNC_Reformat.gct", skip = 2)
## Rows: 899 Columns: 5
## -- Column specification --------------------------------------------------------
## Delimiter: "\t"
## chr (1): NAME
## dbl (3): WT, CAT, CATTCF
## lgl (1): description
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

2. Load in CCLE RNA-Seq data and CTRP AUC data

##CClE Data
#expression data is log(TPM) with a psuedo-count of 1
CCLE_Expresssion_raw <- read_csv(file = "../drug-gene-correlation/data_in/CCLE_expression_21Q3.csv")
## New names:
## * `` -> ...1
## Rows: 1377 Columns: 19178
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr     (1): ...1
## dbl (19177): TSPAN6 (7105), TNMD (64102), DPM1 (8813), SCYL3 (57147), C1orf1...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
CCLE <- rename(CCLE_Expresssion_raw, depmap_id = `...1`)

names(CCLE) <- gsub(pattern = " \\(.*\\)", replacement = "", x = names(CCLE))




### CTRP Data
CTRP_AUCs_raw <- read_csv(file = "../drug-gene-correlation/data_in/Drug_sensitivity_AUC_(CTD^2).csv")
## Rows: 815 Columns: 549
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr   (6): depmap_id, cell_line_display_name, lineage_1, lineage_2, lineage_...
## dbl (543): CIL56 (CTRP:100490), FQI-1 (CTRP:102193), BRD-K92856060 (CTRP:119...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
CTRP <- CTRP_AUCs_raw %>% 
  select(-c(3:6))

colnames(CTRP) <- gsub(pattern = " \\(.*", replacement = "", x = names(CTRP))



CCLE[1:10,1:10]
CTRP[1:10, 1:10]

3. Format and perform GSVA to score each CCLE cell line

(Below information taken from the GSVA vignette found here )

To use the gsva() function from the GSVA package, we need:

  1. A normalized gene expression dataset, which can be provided as: A matrix of expression values with genes corresponding to rows and samples corresponding to columns

  2. A collection of gene sets; which can be provided in one of the following containers A list object where each element corresponds to a gene set defined by a vector of gene identifiers, and the element names correspond to the names of the gene sets. A GeneSetCollection object; see package GSEABase.

TL;DR: Get a gene expression matrix (rows = genes, samples = columns), preferably continuous to use the default parameters of GSVA (our data is log(TPM) units so default will work). Get a list of signatures from signatures created previously

### CCLE RNA-Seq data needs to be transposed 

CCLE.matrix <- as.matrix(CCLE[,-1])
rownames(CCLE.matrix) <- CCLE$depmap_id
CCLE.matrix <- t(CCLE.matrix)

CCLE.matrix[1:10, 1:10]
##          ACH-001113 ACH-001289 ACH-001339 ACH-001538 ACH-000242 ACH-000708
## TSPAN6   4.99050111  5.2098432 3.77925972   5.726831  7.4656480 4.91408610
## TNMD     0.00000000  0.5459684 0.00000000   0.000000  0.0000000 0.17632277
## DPM1     7.27370237  7.0706040 7.34642509   7.086189  6.4354619 6.94684781
## SCYL3    2.76553475  2.5385382 2.33913738   2.543496  2.4141355 2.57773093
## C1orf112 4.48026512  3.5109619 4.25474520   3.102658  3.8649290 3.85399565
## FGR      0.02856915  0.0000000 0.05658353   0.000000  0.8318772 0.00000000
## CFH      1.26903315  0.1763228 1.33913738   5.914565  7.1980030 0.08406426
## FUCA2    3.05831650  3.8369340 6.72424098   6.099716  5.4525295 4.85549144
## GCLC     6.48317085  4.2008496 3.67129337   4.475733  7.1124916 4.93404465
## NFYA     5.05398017  3.8328900 3.77505054   4.294253  4.7109442 3.68144927
##          ACH-000327 ACH-000233 ACH-000461 ACH-000705
## TSPAN6   4.03298242 0.09761080 4.71259578 5.10139795
## TNMD     0.00000000 0.00000000 0.00000000 0.00000000
## DPM1     5.80658184 5.91910168 6.40633258 6.30997649
## SCYL3    1.94860085 3.98367769 2.24792751 2.36176836
## C1orf112 2.68481874 3.73335434 3.03210084 4.28021400
## FGR      0.01435529 0.02856915 0.02856915 0.02856915
## CFH      3.11769504 6.11124002 0.09761080 0.20163386
## FUCA2    5.97750890 2.96347412 5.52857132 2.54349588
## GCLC     3.65306002 3.41548827 6.38370429 6.12660124
## NFYA     3.01435529 4.82068956 3.97361128 4.72683122
#looks good now
# The signature list is already a list of gene sets, with names. It looks appropriately formatted 
summary(signature_list)
##                                                   Length Class  Mode     
## HALLMARK_G2M_CHECKPOINT                           200    -none- character
## KEGG_HOMOLOGOUS_RECOMBINATION                      28    -none- character
## REACTOME_CELL_CYCLE_CHECKPOINTS                   292    -none- character
## REACTOME_CHROMOSOME_MAINTENANCE                   140    -none- character
## REACTOME_HDR_THROUGH_HOMOLOGOUS_RECOMBINATION_HRR  67    -none- character
## REACTOME_HOMOLOGY_DIRECTED_REPAIR                 138    -none- character
gsva.es <- gsva(expr = CCLE.matrix, gset.idx.list = signature_list)
## Estimating GSVA scores for 6 gene sets.
## Estimating ECDFs with Gaussian kernels
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |======================================================================| 100%
gsva.es[1:6, 1:10]
##                                                    ACH-001113  ACH-001289
## HALLMARK_G2M_CHECKPOINT                           0.177042138  0.15312930
## KEGG_HOMOLOGOUS_RECOMBINATION                     0.232633476  0.04283740
## REACTOME_CELL_CYCLE_CHECKPOINTS                   0.262253837  0.31857392
## REACTOME_CHROMOSOME_MAINTENANCE                   0.176103776  0.15390750
## REACTOME_HDR_THROUGH_HOMOLOGOUS_RECOMBINATION_HRR 0.005450199 -0.10976603
## REACTOME_HOMOLOGY_DIRECTED_REPAIR                 0.156876405  0.07452092
##                                                    ACH-001339  ACH-001538
## HALLMARK_G2M_CHECKPOINT                            0.11021614 -0.20487388
## KEGG_HOMOLOGOUS_RECOMBINATION                     -0.08785730 -0.51578937
## REACTOME_CELL_CYCLE_CHECKPOINTS                    0.32727314 -0.08061565
## REACTOME_CHROMOSOME_MAINTENANCE                    0.01238752 -0.26645397
## REACTOME_HDR_THROUGH_HOMOLOGOUS_RECOMBINATION_HRR -0.03859679 -0.34113017
## REACTOME_HOMOLOGY_DIRECTED_REPAIR                 -0.08782913 -0.33774251
##                                                   ACH-000242  ACH-000708
## HALLMARK_G2M_CHECKPOINT                           0.22510576  0.07377926
## KEGG_HOMOLOGOUS_RECOMBINATION                     0.18086885 -0.12627387
## REACTOME_CELL_CYCLE_CHECKPOINTS                   0.10001459  0.06155121
## REACTOME_CHROMOSOME_MAINTENANCE                   0.01855736  0.24130181
## REACTOME_HDR_THROUGH_HOMOLOGOUS_RECOMBINATION_HRR 0.23323746 -0.08872943
## REACTOME_HOMOLOGY_DIRECTED_REPAIR                 0.06286437  0.12780339
##                                                   ACH-000327  ACH-000233
## HALLMARK_G2M_CHECKPOINT                           -0.5623455 -0.08213882
## KEGG_HOMOLOGOUS_RECOMBINATION                     -0.4899335  0.03643954
## REACTOME_CELL_CYCLE_CHECKPOINTS                   -0.5115081 -0.08089407
## REACTOME_CHROMOSOME_MAINTENANCE                   -0.3557128  0.13661285
## REACTOME_HDR_THROUGH_HOMOLOGOUS_RECOMBINATION_HRR -0.5317104 -0.09477485
## REACTOME_HOMOLOGY_DIRECTED_REPAIR                 -0.3594899  0.16636407
##                                                   ACH-000461  ACH-000705
## HALLMARK_G2M_CHECKPOINT                           0.37647500 -0.02841722
## KEGG_HOMOLOGOUS_RECOMBINATION                     0.25157641  0.36450313
## REACTOME_CELL_CYCLE_CHECKPOINTS                   0.15012117  0.04685506
## REACTOME_CHROMOSOME_MAINTENANCE                   0.36489455  0.02013530
## REACTOME_HDR_THROUGH_HOMOLOGOUS_RECOMBINATION_HRR 0.01090514  0.03405479
## REACTOME_HOMOLOGY_DIRECTED_REPAIR                 0.19813566  0.17764985

4. Format the Drug inhibitor information

Get the meta-data for CTRP drugs (MOA information) and then CCLE meta data (cell line origin)

#join the gsva table with CCLE metadata
gsva_ctrp <- as_tibble(t(gsva.es), rownames = "DepMap_ID")
  
CCLE_meta_data <- read_csv(file = "../drug-gene-correlation/data_in/sample_info.csv")
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 1825 Columns: 26
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (19): DepMap_ID, cell_line_name, stripped_cell_line_name, CCLE_Name, ali...
## dbl  (6): COSMICID, Achilles_n_replicates, cell_line_NNMD, cas9_activity, WT...
## lgl  (1): depmap_public_comments
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
gsva_ccl.meta <- CCLE_meta_data %>% 
  select(DepMap_ID, cell_line_name, primary_disease, Subtype, lineage, lineage_subtype, lineage_sub_subtype) %>% 
  right_join(x = ., y = gsva_ctrp)
## Joining, by = "DepMap_ID"
#make the CTRP AUC dataset long, combine with the drug meta data from Alex's review

CTRP_long <- CTRP %>% pivot_longer(cols = 3:ncol(.), names_to = "Drug", values_to = "AUCs")

CTRP_meta_data <- readxl::read_xlsx(path = "../drug-gene-correlation/data_in/Harmonized_Compound_Data.xlsx")

CTRP_with_meta <- CTRP_meta_data %>% 
  filter(Dataset == "CTRPv2") %>% 
  select(Compound_Name_in_Dataset, Compound_MOA, Compound_Molecular_Targets) %>% 
  left_join(x = CTRP_long, y = ., by = c("Drug" = "Compound_Name_in_Dataset"))


#now combine the GSVA with CCLE metadata with the CTRP with meta data

drug_gsva_final_data <- left_join(CTRP_with_meta, gsva_ccl.meta, by = c("depmap_id" = "DepMap_ID"))


drug_gsva_final_data[1:10, ]

5. Perform correlation analysis between GSVA signature scores and Drug Inhibitor

#get which drugs we are filtering for...
drugs_of_interest <- CTRP_with_meta %>% 
  filter(grepl(x = Compound_Molecular_Targets, pattern = "PARP|WNT" )|grepl(x = Compound_MOA, pattern = "PARP|WNT" )) %>% 
  distinct(Drug) %>% 
  pull(Drug)

drugs_of_interest
## [1] "CHIR-99021" "olaparib"   "CCT036477"  "veliparib"  "JW-74"
#now, for each of the drugs, correlate their AUC value with the GSVA 
drug_gsva_final_data %>% 
  filter(grepl(x = Compound_Molecular_Targets, pattern = "PARP|WNT" )|grepl(x = Compound_MOA, pattern = "PARP|WNT" )) %>% 
  group_by(Drug) %>% 
  drop_na(AUCs, any_of(sig.names)) %>%
  summarise_at(vars({{sig.names}}), ~cor(AUCs,., method = "spearman"))

Here is another way of doing it to get the p-values as well

drug_gsva_final_data %>% 
  filter(grepl(x = Compound_Molecular_Targets, pattern = "PARP|WNT" )|grepl(x = Compound_MOA, pattern = "PARP|WNT" )) %>% 
  group_by(Drug) %>% 
  drop_na(AUCs, any_of(sig.names)) %>%
  mutate(count = n()) %>% 
  group_by(Drug, count) %>% 
  summarise_at(vars({{sig.names}}), ~tidy(cor.test(AUCs,., method = "spearman"))) %>% 
  pivot_longer(cols = sig.names, names_to = "Gene_Set", values_to = "nested_tibbles") %>% 
  unnest(cols = nested_tibbles) %>% 
  select(Drug, Gene_Set, n_cell_lines = count, spearman_R = estimate, p.value) %>% 
  arrange(p.value)
## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(sig.names)` instead of `sig.names` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.

6 Turn that mess into a function so we can do this for any drug list or any subset of cell lines

gsva_filtering_function <- function(data_table = drug_gsva_final_data, 
                                    drugs = drugs_of_interest,
                                    cell_lines = cell_lines_of_interest,
                                    signatures = sig.names){
data_table %>% 
  filter(Drug %in% drugs,
         cell_line_name %in% cell_lines) %>% 
  group_by(Drug) %>% 
  drop_na(AUCs, any_of(sig.names)) %>%
  mutate(count = n()) 
}  

cell_lines_of_interest <- unique(drug_gsva_final_data$cell_line_name)

gsva_filtering_function()
gsva_correlation_function <- function(gsva_filtering_output = gsva_filtered, signatures = sig.names){
  gsva_filtering_output %>%  
  group_by(Drug, count) %>% 
  summarise_at(vars({{signatures}}), ~tidy(cor.test(AUCs,., method = "spearman"))) %>% 
  pivot_longer(cols = signatures, names_to = "Gene_Set", values_to = "nested_tibbles") %>% 
  unnest(cols = nested_tibbles) %>% 
  select(Drug, Gene_Set, n_cell_lines = count, spearman_R = estimate, p.value) %>% 
  arrange(p.value)
  
}

7. Come up with some graphing options

gsva_filtering_function() %>%
  group_by(Drug, count) %>% 
  pivot_longer(cols = sig.names, names_to = "gene_set", values_to = "GSVA") %>% 
  ungroup() %>% 
  ggplot(mapping = aes(x = AUCs, y = GSVA)) + 
  geom_point() + 
  facet_grid(gene_set ~ Drug)

gsva_filtering_function() %>%
  group_by(Drug, count) %>% 
  pivot_longer(cols = sig.names, names_to = "gene_set", values_to = "GSVA") %>% 
  ungroup() %>% 
  filter(Drug == "olaparib") %>% 
  ggplot(mapping = aes(x = AUCs, y = GSVA)) + 
  geom_point() + 
  facet_wrap(~gene_set)

gsva_filtering_function() %>%
  filter(Drug == "olaparib") %>% 
  ggplot(mapping = aes(x = HALLMARK_G2M_CHECKPOINT, y = AUCs)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
  ggpubr::stat_cor(method = "spearman")
## `geom_smooth()` using formula 'y ~ x'

8. Make a graphing function

gsva_graphing_function <- function(dataset, drug, gene_set, color = "none"){
  
  gene_set_var <- sym(gene_set)
  
  if(color == "none"){
  dataset %>%
  filter(Drug == drug) %>% 
  ggplot(mapping = aes(x = !!gene_set_var, y = AUCs)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
  ggpubr::stat_cor(method = "spearman") +
    labs(title = paste(drug, gene_set, sep = "::"))
  } else {
  dataset %>%
  filter(Drug == drug) %>% 
  mutate(colored_cell_lines = if_else(cell_line_name %in% color, "T-ALL Cell Line", "Other")) %>% 
  ggplot(mapping = aes(x = !!gene_set_var, y = AUCs)) + 
  geom_point(mapping = aes(color = colored_cell_lines)) + 
  geom_smooth(method = "lm") + 
  ggpubr::stat_cor(method = "spearman") +
    labs(title = paste(drug, gene_set, sep = "::")) + 
    scale_color_manual(values = c("black", "blue"))
  }
  
}

for(i in 1:length(sig.names)) {
gsva_filtering_function() %>% 
gsva_graphing_function(drug = "olaparib", gene_set = sig.names[i]) %>% 
    print()
}
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

9. Analysis: All CTRP Cell Lines

drugs_of_interest <- CTRP_with_meta %>% 
  filter(grepl(x = Compound_Molecular_Targets, pattern = "PARP|WNT" )|grepl(x = Compound_MOA, pattern = "PARP|WNT" )) %>% 
  distinct(Drug) %>% 
  pull(Drug)

cell_lines_of_interest <- unique(drug_gsva_final_data$cell_line_name)

gsva_filtering_function() %>% 
  gsva_correlation_function()
## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(signatures)` instead of `signatures` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
for(i in 1:length(sig.names)) {
gsva_filtering_function() %>% 
gsva_graphing_function(drug = "olaparib", gene_set = sig.names[i]) %>% 
    print()
}
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

10. Leukemia Cell Lines

drugs_of_interest <- CTRP_with_meta %>% 
  filter(grepl(x = Compound_Molecular_Targets, pattern = "PARP|WNT" )|grepl(x = Compound_MOA, pattern = "PARP|WNT" )) %>% 
  distinct(Drug) %>% 
  pull(Drug)

cell_lines_of_interest <- drug_gsva_final_data %>% 
  filter(primary_disease == "Leukemia") %>% 
  select(cell_line_name) %>% 
  distinct() %>% 
  pull(cell_line_name)

gsva_filtering_function() %>% 
  gsva_correlation_function()
## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties
for(i in 1:length(sig.names)) {
gsva_filtering_function() %>% 
gsva_graphing_function(drug = "olaparib", gene_set = sig.names[i]) %>% 
    print()
}
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

11. ALL

drugs_of_interest <- CTRP_with_meta %>% 
  filter(grepl(x = Compound_Molecular_Targets, pattern = "PARP|WNT" )|grepl(x = Compound_MOA, pattern = "PARP|WNT" )) %>% 
  distinct(Drug) %>% 
  pull(Drug)

cell_lines_of_interest <- drug_gsva_final_data %>% 
  filter(lineage_subtype == "ALL")  %>% 
  select(cell_line_name) %>% 
  distinct() %>% 
  pull(cell_line_name)


gsva_filtering_function() %>% 
  gsva_correlation_function()
## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties
for(i in 1:length(sig.names)) {
gsva_filtering_function() %>% 
gsva_graphing_function(drug = "olaparib", gene_set = sig.names[i]) %>% 
    print()
}
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

12. T-ALL

drugs_of_interest <- CTRP_with_meta %>% 
  filter(grepl(x = Compound_Molecular_Targets, pattern = "PARP|WNT" )|grepl(x = Compound_MOA, pattern = "PARP|WNT" )) %>% 
  distinct(Drug) %>% 
  pull(Drug)

cell_lines_of_interest <- drug_gsva_final_data %>% 
  filter(lineage_subtype == "ALL") %>% 
  filter(lineage_sub_subtype == "t_cell") %>% 
  select(cell_line_name) %>% 
  distinct() %>% 
  pull(cell_line_name)


gsva_filtering_function() %>% 
  gsva_correlation_function()
for(i in 1:length(sig.names)) {
gsva_filtering_function() %>% 
gsva_graphing_function(drug = "olaparib", gene_set = sig.names[i]) %>% 
    print()
}
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

13. T-ALL from Stephen

#Stephen gave me a list of cell lines, but they were not needed because Stephen agreed with the T-ALL classifications I found

# xlsx_stephen_cell_lines <-  readxl::read_xlsx(path = "./GDSC - Cell Lines Metadata_TALL.xlsx")
# stephen_cell_lines <- xlsx_stephen_cell_lines$`Sample Name`
# stephen_cell_lines <- tolower(stephen_cell_lines) 
# stephen_cell_lines <- gsub(pattern = "[[:punct:]]", replacement = "", x = stephen_cell_lines)


drugs_of_interest <- CTRP_with_meta %>% 
  filter(grepl(x = Compound_Molecular_Targets, pattern = "PARP|WNT" )|grepl(x = Compound_MOA, pattern = "PARP|WNT" )) %>% 
  distinct(Drug) %>% 
  pull(Drug)


cell_lines_of_interest <- drug_gsva_final_data %>% 
  filter(primary_disease == "Leukemia") %>% 
  select(cell_line_name) %>% 
  distinct() %>% 
  pull(cell_line_name)


tall_color <- drug_gsva_final_data %>% 
  filter(lineage_subtype == "ALL") %>% 
  filter(lineage_sub_subtype == "t_cell") %>% 
  select(cell_line_name) %>% 
  distinct() %>% 
  pull(cell_line_name)



gsva_filtering_function() %>% 
  gsva_correlation_function()
## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties

## Warning in cor.test.default(AUCs, ., method = "spearman"): Cannot compute exact
## p-value with ties
for(i in 1:length(sig.names)) {
gsva_filtering_function() %>% 
gsva_graphing_function(drug = "olaparib", gene_set = sig.names[i], color = tall_color) %>% 
    print()
}
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used
## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'

for(i in 1:length(sig.names)) {
  for(j in 1:length(drugs_of_interest)){
gsva_filtering_function() %>% 
gsva_graphing_function(drug = drugs_of_interest[j], gene_set = sig.names[i], color = tall_color) %>% 
    print()
  }
}
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used
## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'
## Warning in if (color == "none") {: the condition has length > 1 and only the
## first element will be used

## `geom_smooth()` using formula 'y ~ x'

Notes: The correlations start becoming meh (no significant correlations) once we get down to only the leukemia cell lines

May need to repeat the GSVA for every subset of CCLs that I use

T-ALL cell lines in Broad/CCLE names don’t quite match (11/16)

GDSC data still needs to be used